This document conducts an EDA on census data only, first at the tract level and then at the places level.
library(tidyverse)
## ── Attaching packages ──────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
## ✔ tibble 1.4.2 ✔ dplyr 0.7.4
## ✔ tidyr 0.8.0 ✔ stringr 1.3.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(sf)
## Warning: package 'sf' was built under R version 3.4.4
## Linking to GEOS 3.6.1, GDAL 2.1.3, proj.4 4.9.3
library(spdep)
## Warning: package 'spdep' was built under R version 3.4.4
## Loading required package: sp
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
## Loading required package: spData
## Warning: package 'spData' was built under R version 3.4.4
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source'))`
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 3.4.4
data_dir = '~/Google Drive/Coding/EJ datasets/CA pesticide/'
tracts_sf = read_rds(str_c(data_dir, '02_tracts_sf.Rds')) %>%
## Remove tracts w/ total population or total employed 0
filter(total_popE != 0, total_employedE != 0) %>%
## Population proportions
mutate(whiteP = whiteE / total_popE,
whitePM = moe_prop(whiteE, total_popE, whiteM, total_popM),
blackP = blackE / total_popE,
blackPM = moe_prop(blackE, total_popE, blackM, total_popM),
indigenousP = indigenousE / total_popE,
indigenousPM = moe_prop(indigenousE, total_popE, indigenousM, total_popM),
asianP = asianE / total_popE,
asianPM = moe_prop(asianE, total_popE, asianM, total_popM),
hispanicP = hispanicE / total_popE,
hispanicPM = moe_prop(hispanicE, total_popE, hispanicM, total_popM),
noncitizensP = noncitizensE / total_popE,
noncitizensPM = moe_prop(noncitizensE, total_popE,
noncitizensM, total_popM),
childrenP = childrenE / total_popE,
childrenPM = moe_prop(childrenE, total_popE, childrenM, total_popM),
poverty_combE = povertyE + extreme_povertyE,
poverty_combM = sqrt(povertyM^2 + extreme_povertyM^2),
poverty_combP = poverty_combE / total_popE,
poverty_combPM = moe_prop(poverty_combE, total_popE,
poverty_combM, total_popM),
hisp_povertyP = hisp_povertyE / hispanicE,
hisp_povertyPM = moe_prop(hisp_povertyE, hispanicE, hisp_povertyM, hispanicM),
ag_employedP = ag_employedE / total_employedE,
ag_employedPM = moe_prop(ag_employedE, total_employedE, ag_employedM, total_employedM)
) %>%
## Population densities
mutate_at(vars(whiteE, blackE, indigenousE, asianE, hispanicE,
noncitizensE, childrenE, poverty_combE,
hisp_povertyP, ag_employedP),
funs(D = . / units::drop_units(area)))
## Warning in sqrt(x): NaNs produced
## Warning in sqrt(x): NaNs produced
## Warning in sqrt(x): NaNs produced
## Gives a warning about NaNs;
## but there aren't any in the output
# as.data.frame() %>%
# select(-geometry) %>%
# transmute_all(funs(is.nan)) %>%
# summarize_all(sum)
glimpse(tracts_sf)
## Observations: 1,044
## Variables: 65
## $ GEOID <chr> "06007000501", "06007000903", "06007003300", ...
## $ NAME <chr> "Census Tract 5.01, Butte County, California"...
## $ total_popE <dbl> 3928, 6473, 4544, 6215, 2912, 4845, 5536, 324...
## $ total_popM <dbl> 347, 767, 455, 261, 175, 477, 558, 341, 272, ...
## $ whiteE <dbl> 2743, 4200, 3140, 1717, 710, 32, 1832, 332, 5...
## $ whiteM <dbl> 367, 604, 405, 278, 109, 25, 322, 105, 109, 1...
## $ blackE <dbl> 24, 265, 159, 26, 436, 859, 157, 261, 160, 35...
## $ blackM <dbl> 28, 171, 159, 56, 95, 305, 175, 149, 96, 151,...
## $ indigenousE <dbl> 5, 62, 250, 75, 113, 66, 142, 81, 29, 89, 9, ...
## $ indigenousM <dbl> 8, 49, 136, 37, 44, 74, 113, 97, 26, 70, 15, ...
## $ asianE <dbl> 91, 569, 118, 143, 166, 1010, 63, 586, 549, 1...
## $ asianM <dbl> 77, 208, 100, 111, 54, 330, 87, 252, 295, 328...
## $ hispanicE <dbl> 834, 1050, 680, 4260, 1537, 2884, 3390, 2020,...
## $ hispanicM <dbl> 293, 236, 220, 280, 158, 483, 485, 386, 195, ...
## $ noncitizensE <dbl> 114, 330, 304, 1309, 286, 1113, 258, 384, 367...
## $ noncitizensM <dbl> 80, 128, 234, 290, 72, 324, 191, 129, 157, 32...
## $ childrenE <dbl> 173, 367, 216, 357, 0, 531, 528, 358, 271, 56...
## $ childrenM <dbl> 77, 136, 77, 142, 12, 164, 278, 123, 88, 179,...
## $ extreme_povertyE <dbl> 732, 588, 218, 144, 103, 1619, 950, 234, 325,...
## $ extreme_povertyM <dbl> 253, 241, 103, 111, 53, 482, 490, 135, 211, 5...
## $ povertyE <dbl> 291, 730, 771, 697, 269, 1352, 1023, 880, 462...
## $ povertyM <dbl> 158, 282, 304, 313, 65, 577, 391, 261, 245, 4...
## $ hisp_povertyE <dbl> 332, 323, 246, 738, 147, 1781, 1438, 717, 487...
## $ hisp_povertyM <dbl> 142, 154, 230, 327, 71, 504, 563, 257, 253, 5...
## $ total_employedE <dbl> 1963, 3207, 1433, 2775, 138, 1255, 2210, 1308...
## $ total_employedM <dbl> 253, 425, 242, 268, 62, 255, 397, 188, 175, 2...
## $ ag_employedE <dbl> 42, 60, 77, 794, 0, 247, 64, 26, 83, 24, 77, ...
## $ ag_employedM <dbl> 36, 37, 56, 196, 12, 136, 83, 24, 79, 38, 63,...
## $ area <S3: units> 5.4468555 km^2, 5.9151835 km^2, 309.810...
## $ densityE <dbl> 721.150027, 1094.302488, 14.667041, 6.526313,...
## $ densityM <dbl> 63.7064816, 129.6663075, 1.4686407, 0.2740736...
## $ density_log10 <dbl> 2.8580256, 3.0391374, 1.1663425, 0.8146679, 3...
## $ whiteP <dbl> 0.698319756, 0.648849065, 0.691021127, 0.2762...
## $ whitePM <dbl> 0.070170384, 0.052875339, 0.056179824, 0.0431...
## $ blackP <dbl> 0.006109980, 0.040939286, 0.034991197, 0.0041...
## $ blackPM <dbl> 0.007107845, 0.025968218, 0.034815337, 0.0090...
## $ indigenousP <dbl> 0.001272912, 0.009578248, 0.055017606, 0.0120...
## $ indigenousPM <dbl> 0.002033553, 0.007484341, 0.029418196, 0.0059...
## $ asianP <dbl> 0.02316701, 0.08790360, 0.02596831, 0.0230088...
## $ asianPM <dbl> 0.019495725, 0.030398513, 0.021852884, 0.0178...
## $ hispanicP <dbl> 0.21232179, 0.16221227, 0.14964789, 0.6854384...
## $ hispanicPM <dbl> 0.07219597, 0.03098106, 0.04603828, 0.0346572...
## $ noncitizensP <dbl> 0.02902240, 0.05098100, 0.06690141, 0.2106194...
## $ noncitizensPM <dbl> 0.02020458, 0.01882915, 0.05105890, 0.0458153...
## $ childrenP <dbl> 0.044042770, 0.056697049, 0.047535211, 0.0574...
## $ childrenPM <dbl> 0.019212858, 0.019907315, 0.016263200, 0.0227...
## $ poverty_combE <dbl> 1023, 1318, 989, 841, 372, 2971, 1973, 1114, ...
## $ poverty_combM <dbl> 298.28342, 370.95148, 320.97508, 332.09938, 8...
## $ poverty_combP <dbl> 0.26043788, 0.20361502, 0.21764965, 0.1353177...
## $ poverty_combPM <dbl> 0.07236859, 0.05198123, 0.06719105, 0.0531321...
## $ hisp_povertyP <dbl> 0.39808153, 0.30761905, 0.36176471, 0.1732394...
## $ hisp_povertyPM <dbl> 0.09711196, 0.12934693, 0.31733956, 0.0759113...
## $ ag_employedP <dbl> 0.021395823, 0.018709074, 0.053733426, 0.2861...
## $ ag_employedPM <dbl> 0.018130769, 0.011267702, 0.038010707, 0.0650...
## $ whiteE_D <dbl> 503.593311, 710.037147, 10.135235, 1.803005, ...
## $ blackE_D <dbl> 4.40621198, 44.79996283, 0.51321732, 0.027302...
## $ indigenousE_D <dbl> 0.91796083, 10.48150074, 0.80694547, 0.078756...
## $ asianE_D <dbl> 16.70688709, 96.19312773, 0.38087826, 0.15016...
## $ hispanicE_D <dbl> 153.115866, 177.509287, 2.194892, 4.473386, 1...
## $ noncitizensE_D <dbl> 20.92950691, 55.78863295, 0.98124569, 1.37456...
## $ childrenE_D <dbl> 31.7614447, 62.0437221, 0.6972009, 0.3748823,...
## $ poverty_combE_D <dbl> 187.8147857, 222.8164189, 3.1922763, 0.883126...
## $ hisp_povertyP_D <dbl> 0.0730846512, 0.0520049883, 0.0011676976, 0.0...
## $ ag_employedP_D <dbl> 3.928105e-03, 3.162890e-03, 1.734398e-04, 3.0...
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((593948.5 44..., ...
## Distributions of proportions ----
tracts_sf %>%
as.data.frame() %>%
select(ends_with('P')) %>%
gather(key = variable, value) %>%
ggplot(aes(value, color = variable, fill = variable)) +
geom_density() +
geom_rug() +
facet_wrap(~ variable, scales = 'free') +
scale_x_continuous(labels = scales::percent_format())
There are a few tracts with a modest proportion of Asian and Black residents (\(> 20\%\)); but only a few. Almost no tracts have more than 5% Indigenous residents, and none have more than about 20%. Children are typically ~5-12% of the population. The poverty rate varies dramatically, with a median somewhere around 30% and some values above 50%. Hispanic and White proportions are the most diverse.
tracts_sf %>%
mutate(w_plus_h = whiteP + hispanicP) %>%
ggplot(aes(w_plus_h)) +
geom_density() +
geom_rug()
In most tracts, a supermajority of people are either White or Hispanic.
ggplot(tracts_sf, aes(hispanicP, poverty_combP)) +
geom_point() +
geom_smooth(method = 'lm')
A greater Hispanic proportion is associated with a greater poverty rate.
## Correlations ----
tracts_sf %>%
as_tibble() %>%
select(densityE, ends_with('P')) %>%
cor() %>%
as.data.frame() %>%
rownames_to_column(var = 'var1') %>%
as_tibble() %>%
gather(key = 'var2', value = 'cor', -var1) %>%
mutate(cor.print = round(cor, digits = 1)) %>%
ggplot(aes(var1, var2, fill = cor, label = cor.print)) +
geom_tile() +
geom_text() +
scale_fill_gradient2()
tracts_sf %>%
as_tibble() %>%
select(densityE, ends_with('P'), -whiteP) %>%
cor() %>%
as.data.frame() %>%
rownames_to_column(var = 'var1') %>%
as_tibble() %>%
gather(key = 'var2', value = 'cor', -var1) %>%
filter(abs(cor) > .4, var1 < var2) %>%
arrange(desc(abs(cor)))
## # A tibble: 12 x 3
## var1 var2 cor
## <chr> <chr> <dbl>
## 1 hispanicP noncitizensP 0.847
## 2 hisp_povertyP poverty_combP 0.842
## 3 ag_employedP noncitizensP 0.743
## 4 ag_employedP hispanicP 0.672
## 5 hispanicP poverty_combP 0.600
## 6 noncitizensP poverty_combP 0.598
## 7 childrenP hispanicP 0.560
## 8 childrenP poverty_combP 0.481
## 9 childrenP noncitizensP 0.470
## 10 hisp_povertyP noncitizensP 0.446
## 11 hisp_povertyP hispanicP 0.444
## 12 ag_employedP poverty_combP 0.420
White proportion has moderate to very strong negative corelations with every other variable (except Indigenous). Very strong correlation between Hispanic and noncitizen proportion and between Hispanic and general poverty. Strong correlations between agricultural employment and noncitizens and Hispanic. Moderate correlations between each pair of poverty, children, noncitizens, and Hispanic, and between agricultural employment and poverty.
ggplot(tracts_sf, aes((densityE), hispanicP)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'gam'
No indication of a relationship between population density and Hispanic, linear or nonlinear.
## White/Hispanic segregation ----
## Evenness/dissimilarity
tracts_sf %>%
as.data.frame() %>%
mutate(w_h_dissim = abs(hispanicE/sum(hispanicE) - whiteE/sum(whiteE))) %>%
summarize(w_h_dissim = .5 * sum(w_h_dissim))
## w_h_dissim
## 1 0.4535279
Evenness is moderate, at 47%
ggplot(tracts_sf, aes(abs(hispanicP - whiteP))) +
geom_density() +
geom_rug()
## Exposure/interaction
tracts_sf %>%
as.data.frame() %>%
mutate(w_h_exposure = abs(whiteE/sum(whiteE) * hispanicE / total_popE),
h_w_exposure = abs(hispanicE/sum(hispanicE) * whiteE/total_popE)) %>%
summarize(w_h_exposure = sum(w_h_exposure),
h_w_exposure = sum(h_w_exposure))
## w_h_exposure h_w_exposure
## 1 0.3185166 0.2919732
Exposure is moderate-low, at about 30% in both directions
## Correlation
ggplot(tracts_sf, aes(hispanicP, whiteP)) +
geom_point()
with(tracts_sf, cor(hispanicP, whiteP))
## [1] -0.8582687
Very strong negative correlation between the two proportions. But I guess, in this kind of two-group context, we would get a very strong negative correlation even if dissimilarity were low and exposure were high.
tracts_sf %>%
as.data.frame() %>%
as_tibble() %>%
summarize_at(vars(whiteE, hispanicE), funs(sum)) %>%
mutate(hw_ratio = hispanicE / whiteE)
## # A tibble: 1 x 3
## whiteE hispanicE hw_ratio
## <dbl> <dbl> <dbl>
## 1 2142773. 2337573. 1.09
About 20% more Hispanics than Whites
## Spatial weights ----
coords_tracts = tracts_sf %>%
st_centroid() %>%
st_coordinates()
## Contiguity
weights_tracts_contig = tracts_sf %>%
pull(geometry) %>%
as_Spatial() %>%
poly2nb() %>%
nb2listw(style = 'W')
## KNN
weights_tracts_knn = 3:8 %>%
set_names() %>%
map(~ {knearneigh(coords_tracts, k = .x) %>%
knn2nb() %>%
nb2listw(style = 'W')})
## Inverse spatial weights w/in 50 km
dnn_tracts = dnearneigh(coords_tracts, d1 = 0, d2 = 50 * 1000)
weights_tracts_d = nbdists(dnn_tracts, coords = coords_tracts) %>%
map( ~ 1/.) %>%
nb2listw(dnn_tracts, glist = ., style = 'W',
zero.policy = TRUE)
weights_tracts = c(weights_tracts_knn,
contiguity = list(weights_tracts_contig),
distance = list(weights_tracts_d))
plot(tracts_sf, max.plot = 1)
plot(weights_tracts$contiguity, coords = coords_tracts,
add = TRUE, col = 'blue')
## Moran's I ----
moran.i = function(vec, weights, ...) {
moran.test(vec,
weights, ...)$estimate['Moran I statistic']
}
## Moran's I for overall density
moran_i_tracts = weights_tracts %>%
map_dfr(~moran.i(log10(tracts_sf$densityE), .)) %>%
gather(key = 'k', value = 'I')
moran_i_tracts
## # A tibble: 8 x 2
## k I
## <chr> <dbl>
## 1 3 0.458
## 2 4 0.449
## 3 5 0.446
## 4 6 0.433
## 5 7 0.423
## 6 8 0.411
## 7 contiguity 0.662
## 8 distance 0.260
Moderate population clustering, ~.40-.45 for KNN
weights_tracts %>%
tibble(weights = ., k = names(.)) %>%
crossing(tibble(variable = c('whiteE_D',
'blackE_D',
'indigenousE_D',
'asianE_D',
'hispanicE_D',
'noncitizensE_D',
'childrenE_D',
'poverty_combE_D'))) %>%
rowwise() %>%
mutate(var_value = {tracts_sf %>%
as.data.frame() %>%
pull(variable) %>%
{. + 1} %>% log10() %>%
list()},
moran_i = moran.i(var_value, weights)) %>%
select(k, variable, moran_i) %>%
arrange(desc(moran_i)) %>%
mutate(variable = fct_inorder(variable)) %>%
ggplot(aes(variable, moran_i, color = k, group = k)) +
geom_point() +
geom_line() +
geom_hline(data = moran_i_tracts,
aes(yintercept = I, color = k),
linetype = 'dashed') +
coord_flip()
The 6 KNN neighborings all give similar values of Moran’s \(I\), with slightly lower values as \(K\) increases. The dashed lines correspond to the values of \(I\) for total population density, calculated above. Distance-based weights have consistently lower values of Moran’s \(I\), but order the groups in basically the same way. Continuity weights have consistently higher values of I, with almost no difference between different groups.
Asian and black residents have moderate-high clustering. White, Hispanic, and noncitizen residents have moderate clustering. Children and impoverished residents seem to have clustering values the same as or just above the overall population. Indigenous people have weak positive clustering.
places_sf = read_rds(str_c(data_dir, '02_places_sf.Rds')) %>%
## There are 9 places with total population 0
filter(total_popE != 0) %>%
## Population proportions
mutate(whiteP = whiteE / total_popE,
whitePM = moe_prop(whiteE, total_popE, whiteM, total_popM),
blackP = blackE / total_popE,
blackPM = moe_prop(blackE, total_popE, blackM, total_popM),
indigenousP = indigenousE / total_popE,
indigenousPM = moe_prop(indigenousE, total_popE, indigenousM, total_popM),
asianP = asianE / total_popE,
asianPM = moe_prop(asianE, total_popE, asianM, total_popM),
hispanicP = hispanicE / total_popE,
hispanicPM = moe_prop(hispanicE, total_popE, hispanicM, total_popM),
noncitizensP = noncitizensE / total_popE,
noncitizensPM = moe_prop(noncitizensE, total_popE,
noncitizensM, total_popM),
childrenP = childrenE / total_popE,
childrenPM = moe_prop(childrenE, total_popE, childrenM, total_popM),
poverty_combE = povertyE + extreme_povertyE,
poverty_combM = sqrt(povertyM^2 + extreme_povertyM^2),
poverty_combP = poverty_combE / total_popE,
poverty_combPM = moe_prop(poverty_combE, total_popE,
poverty_combM, total_popM),
hisp_povertyP = hisp_povertyE / hispanicE,
hisp_povertyPM = moe_prop(hisp_povertyE, hispanicE, hisp_povertyM, hispanicM),
ag_employedP = ag_employedE / total_employedE,
ag_employedPM = moe_prop(ag_employedE, total_employedE, ag_employedM, total_employedM)) %>%
## Population densities
mutate_at(vars(whiteE, blackE, indigenousE, asianE, hispanicE,
noncitizensE, childrenE, poverty_combE,
hisp_povertyE, ag_employedE),
funs(D = . / units::drop_units(area)))
## Warning in sqrt(x): NaNs produced
## Warning in sqrt(x): NaNs produced
## Warning in sqrt(x): NaNs produced
## Warning in sqrt(x): NaNs produced
## Warning in sqrt(x): NaNs produced
## Warning in sqrt(x): NaNs produced
glimpse(places_sf)
## Observations: 397
## Variables: 66
## $ GEOID <chr> "0605290", "0619402", "0660984", "0675630", "...
## $ name <chr> "Benicia city, California", "Dixon city, Cali...
## $ county <chr> "Solano", "Solano", "Solano", "Solano", "Yuba...
## $ total_popE <dbl> 27657, 18908, 7826, 28867, 12161, 46952, 7032...
## $ total_popM <dbl> 44, 47, 28, 66, 40, 47, 30, 25, 55, 29, 32, 1...
## $ whiteE <dbl> 16809, 9523, 5646, 7393, 7316, 13366, 3649, 2...
## $ whiteM <dbl> 725, 848, 449, 693, 521, 909, 474, 544, 288, ...
## $ blackE <dbl> 1510, 405, 544, 6260, 642, 804, 103, 194, 19,...
## $ blackM <dbl> 402, 261, 238, 667, 284, 316, 138, 179, 20, 2...
## $ indigenousE <dbl> 161, 85, 0, 189, 105, 523, 54, 13, 308, 85, 9...
## $ indigenousM <dbl> 99, 99, 17, 109, 71, 225, 54, 21, 194, 64, 14...
## $ asianE <dbl> 3430, 477, 784, 5838, 704, 2699, 100, 165, 85...
## $ asianM <dbl> 459, 192, 247, 636, 209, 532, 104, 94, 319, 2...
## $ hispanicE <dbl> 4349, 7538, 716, 7231, 3010, 28503, 2868, 715...
## $ hispanicM <dbl> 520, 814, 298, 583, 413, 943, 438, 511, 349, ...
## $ noncitizensE <dbl> 1316, 2140, 242, 2714, 762, 7309, 542, 1926, ...
## $ noncitizensM <dbl> 285, 527, 141, 478, 309, 637, 236, 426, 303, ...
## $ childrenE <dbl> 1473, 1219, 256, 1972, 1038, 3919, 550, 991, ...
## $ childrenM <dbl> 253, 275, 136, 302, 199, 425, 195, 306, 165, ...
## $ extreme_povertyE <dbl> 973, 707, 391, 2105, 1297, 3141, 856, 634, 61...
## $ extreme_povertyM <dbl> 289, 359, 201, 633, 642, 598, 544, 360, 282, ...
## $ povertyE <dbl> 514, 1901, 522, 2019, 2019, 5122, 844, 1452, ...
## $ povertyM <dbl> 185, 768, 243, 538, 655, 922, 430, 694, 391, ...
## $ hisp_povertyE <dbl> 145, 1371, 66, 1794, 909, 5221, 1167, 1762, 1...
## $ hisp_povertyM <dbl> 90, 641, 82, 636, 382, 892, 583, 727, 410, 67...
## $ total_employedE <dbl> 13566, 8916, 2531, 13241, 4064, 18324, 2730, ...
## $ total_employedM <dbl> 479, 525, 276, 650, 401, 758, 324, 386, 221, ...
## $ ag_employedE <dbl> 101, 378, 86, 92, 104, 632, 194, 197, 414, 41...
## $ ag_employedM <dbl> 53, 169, 95, 70, 77, 172, 116, 140, 107, 205,...
## $ area <S3: units> 40.687430 km^2, 18.699253 km^2, 18.2224...
## $ densityE <dbl> 679.7431, 1011.1634, 429.4712, 2678.8501, 131...
## $ densityM <dbl> 1.0814151, 2.5134694, 1.5365696, 6.1247829, 4...
## $ density_log10 <dbl> 2.832345, 3.004821, 2.632934, 3.427948, 3.117...
## $ whiteP <dbl> 0.60776657, 0.50364925, 0.72144135, 0.2561055...
## $ whitePM <dbl> 0.026196140, 0.044831264, 0.057314767, 0.0239...
## $ blackP <dbl> 0.054597389, 0.021419505, 0.069511883, 0.2168...
## $ blackPM <dbl> 0.014534940, 0.013803578, 0.030410432, 0.0231...
## $ indigenousP <dbl> 0.005821311, 0.004495452, 0.000000000, 0.0065...
## $ indigenousPM <dbl> 0.003579552, 0.005235867, 0.002172246, 0.0037...
## $ asianP <dbl> 0.124019236, 0.025227417, 0.100178891, 0.2022...
## $ asianPM <dbl> 0.016594987, 0.010154238, 0.031559427, 0.0220...
## $ hispanicP <dbl> 0.15724771, 0.39866723, 0.09148991, 0.2504936...
## $ hispanicPM <dbl> 0.018800086, 0.043039154, 0.038076794, 0.0201...
## $ noncitizensP <dbl> 0.04758289, 0.11317961, 0.03092257, 0.0940173...
## $ noncitizensPM <dbl> 0.010304527, 0.027870380, 0.018016527, 0.0165...
## $ childrenP <dbl> 0.05325957, 0.06447007, 0.03271147, 0.0683133...
## $ childrenPM <dbl> 0.009147382, 0.014543225, 0.017377577, 0.0104...
## $ poverty_combE <dbl> 1487, 2608, 913, 4124, 3316, 8263, 1700, 2086...
## $ poverty_combM <dbl> 343.14137, 847.76471, 315.35694, 830.74244, 9...
## $ poverty_combP <dbl> 0.05376577, 0.13793103, 0.11666241, 0.1428620...
## $ poverty_combPM <dbl> 0.01240674, 0.04483499, 0.04029390, 0.0287764...
## $ hisp_povertyP <dbl> 0.03334100, 0.18187848, 0.09217877, 0.2480984...
## $ hisp_povertyPM <dbl> 0.02030681, 0.08273661, 0.10790802, 0.0856498...
## $ ag_employedP <dbl> 0.007445083, 0.042395693, 0.033978665, 0.0069...
## $ ag_employedPM <dbl> 0.003897972, 0.018789579, 0.037351236, 0.0052...
## $ whiteE_D <dbl> 413.12514, 509.27168, 309.83829, 686.06848, 7...
## $ blackE_D <dbl> 37.112199, 21.658619, 29.853353, 580.926374, ...
## $ indigenousE_D <dbl> 3.956996, 4.545636, 0.000000, 17.539151, 11.3...
## $ asianE_D <dbl> 84.301221, 25.509040, 43.023950, 541.764884, ...
## $ hispanicE_D <dbl> 106.88805, 403.11771, 39.29228, 671.03492, 32...
## $ noncitizensE_D <dbl> 32.344142, 114.443075, 13.280352, 251.858495,...
## $ childrenE_D <dbl> 36.202827, 65.189770, 14.048637, 183.001088, ...
## $ poverty_combE_D <dbl> 36.54691, 139.47081, 50.10315, 382.70613, 357...
## $ hisp_povertyE_D <dbl> 3.563754, 73.318437, 3.621914, 166.482734, 97...
## $ ag_employedE_D <dbl> 2.482339, 20.214711, 4.719464, 8.537576, 11.2...
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((570293.6 42..., ...
## Distributions of proportions ----
places_sf %>%
as.data.frame() %>%
select(ends_with('P')) %>%
gather(key = variable, value) %>%
ggplot(aes(value, color = variable, fill = variable)) +
geom_density() +
geom_rug() +
facet_wrap(~ variable, scales = 'free') +
scale_x_continuous(labels = scales::percent_format())
## Warning: Removed 38 rows containing non-finite values (stat_density).
Slightly higher proportions across the board. But no dramatic differences.
places_sf %>%
mutate(w_plus_h = whiteP + hispanicP) %>%
ggplot(aes(w_plus_h)) +
geom_density() +
geom_rug()
Again, white+Hispanic supermajority
ggplot(places_sf, aes(hispanicP, poverty_combP)) +
geom_point() +
geom_smooth(method = 'lm')
Again, greater Hispanic proportion is associated with a greater poverty rate.
## Correlations ----
places_sf %>%
as_tibble() %>%
select(densityE, whiteP, blackP, childrenP, hispanicP, indigenousP, noncitizensP, poverty_combP, whiteP) %>%
cor() %>%
as.data.frame() %>%
rownames_to_column(var = 'var1') %>%
as_tibble() %>%
gather(key = 'var2', value = 'cor', -var1) %>%
mutate(cor.print = round(cor, digits = 1)) %>%
ggplot(aes(var1, var2, fill = cor, label = cor.print)) +
geom_tile() +
geom_text() +
scale_fill_gradient2()
places_sf %>%
as_tibble() %>%
select(densityE, ends_with('P'), -whiteP) %>%
cor() %>%
as.data.frame() %>%
rownames_to_column(var = 'var1') %>%
as_tibble() %>%
gather(key = 'var2', value = 'cor', -var1) %>%
filter(abs(cor) > .4, var1 < var2) %>%
arrange(desc(abs(cor)))
## # A tibble: 4 x 3
## var1 var2 cor
## <chr> <chr> <dbl>
## 1 hispanicP noncitizensP 0.849
## 2 hispanicP poverty_combP 0.606
## 3 noncitizensP poverty_combP 0.574
## 4 densityE hispanicP 0.480
White is still anticorrelated with everything except Indigenous. Strong or moderate correlations between noncitizens, poverty, or Hispanic. (Not children.) Moderate correlations between Hispanic and density, and moderate-weak between Hispanic and children and density and noncitizens.
ggplot(places_sf, aes((densityE), hispanicP)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess'
Monotonic nonlinear relationship between density and Hispanic.
## White/Hispanic segregation ----
## Evenness/dissimilarity
places_sf %>%
as.data.frame() %>%
mutate(w_h_dissim = abs(hispanicE/sum(hispanicE) - whiteE/sum(whiteE))) %>%
summarize(w_h_dissim = .5 * sum(w_h_dissim))
## w_h_dissim
## 1 0.3576047
36% dissimilarity, lower than with tracts
ggplot(places_sf, aes(abs(hispanicP - whiteP))) +
geom_density() +
geom_rug()
## Exposure/interaction
places_sf %>%
as.data.frame() %>%
mutate(w_h_exposure = abs(whiteE/sum(whiteE) * hispanicE / total_popE),
h_w_exposure = abs(hispanicE/sum(hispanicE) * whiteE/total_popE)) %>%
summarize(w_h_exposure = sum(w_h_exposure),
h_w_exposure = sum(h_w_exposure))
## w_h_exposure h_w_exposure
## 1 0.3517999 0.3049761
Slightly higher White-Hispanic exposure, but still moderate-low
## Correlation
ggplot(places_sf, aes(hispanicP, whiteP)) +
geom_point()
with(tracts_sf, cor(hispanicP, whiteP))
## [1] -0.8582687
Very strong negative correlation between the two proportions. But I guess, in this kind of two-group context, we would get a very strong negative correlation even if dissimilarity were low and exposure were high.
places_sf %>%
as.data.frame() %>%
as_tibble() %>%
summarize_at(vars(whiteE, hispanicE), funs(sum)) %>%
mutate(hw_ratio = hispanicE / whiteE)
## # A tibble: 1 x 3
## whiteE hispanicE hw_ratio
## <dbl> <dbl> <dbl>
## 1 1787703. 2062174. 1.15
Hispanic-white ratio slightly higher, at 27%
## Spatial weights ----
library(spdep)
coords_places = places_sf %>%
st_centroid() %>%
st_coordinates()
## Contiguity
weights_places_contig = places_sf %>%
pull(geometry) %>%
as_Spatial() %>%
poly2nb() %>%
nb2listw(style = 'W', zero.policy = TRUE)
## KNN
weights_places_knn = 3:8 %>%
set_names() %>%
map(~ {knearneigh(coords_places, k = .x) %>%
knn2nb() %>%
nb2listw(style = 'W')})
## Inverse spatial weights w/in 50 km
dnn_places = dnearneigh(coords_places, d1 = 0, d2 = 50 * 1000)
weights_places_d = nbdists(dnn_places, coords = coords_places) %>%
map( ~ 1/.) %>%
nb2listw(dnn_places, glist = ., style = 'W', zero.policy = TRUE)
weights_places = c(weights_places_knn,
contiguity = list(weights_places_contig),
distance = list(weights_places_d))
plot(places_sf, max.plot = 1)
plot(weights_places$contiguity, coords = coords_places,
add = TRUE, col = 'blue')
## All systems of neighbors produce an archipelago of tight clusters and longer connections. Neither seems to produce ridiculously extended "neighbor" connections.
##
## Contiguity weights produce large numbers of islands: 246/397 (62%) have no neighbors
weights_places$contiguity$neighbours
## Neighbour list object:
## Number of regions: 397
## Number of nonzero links: 242
## Percentage nonzero weights: 0.1535445
## Average number of links: 0.6095718
## 246 regions with no links:
## ID2 ID3 ID5 ID7 ID8 ID9 ID12 ID13 ID14 ID15 ID21 ID22 ID23 ID25 ID27 ID33 ID34 ID35 ID36 ID37 ID41 ID42 ID44 ID45 ID49 ID50 ID51 ID52 ID53 ID54 ID55 ID56 ID57 ID58 ID59 ID60 ID61 ID62 ID63 ID64 ID65 ID66 ID67 ID68 ID74 ID75 ID77 ID78 ID80 ID81 ID82 ID85 ID86 ID87 ID88 ID90 ID91 ID92 ID93 ID97 ID98 ID99 ID101 ID102 ID104 ID106 ID107 ID110 ID111 ID112 ID113 ID114 ID115 ID117 ID118 ID119 ID121 ID123 ID124 ID125 ID130 ID131 ID132 ID133 ID135 ID136 ID137 ID139 ID140 ID141 ID142 ID143 ID144 ID146 ID148 ID149 ID150 ID152 ID153 ID154 ID155 ID157 ID158 ID159 ID160 ID161 ID162 ID163 ID166 ID169 ID170 ID172 ID173 ID174 ID175 ID177 ID178 ID180 ID183 ID184 ID185 ID186 ID188 ID193 ID194 ID195 ID200 ID201 ID205 ID206 ID207 ID208 ID210 ID211 ID212 ID214 ID217 ID222 ID225 ID226 ID227 ID228 ID229 ID233 ID235 ID236 ID238 ID239 ID240 ID243 ID244 ID245 ID246 ID247 ID249 ID250 ID251 ID253 ID254 ID257 ID259 ID260 ID262 ID264 ID265 ID266 ID268 ID270 ID271 ID272 ID273 ID275 ID278 ID279 ID282 ID283 ID284 ID287 ID294 ID298 ID300 ID301 ID304 ID305 ID306 ID307 ID308 ID309 ID310 ID312 ID313 ID315 ID316 ID317 ID319 ID320 ID321 ID323 ID325 ID326 ID327 ID328 ID333 ID334 ID335 ID337 ID338 ID339 ID340 ID341 ID345 ID346 ID347 ID349 ID350 ID351 ID352 ID353 ID354 ID355 ID357 ID361 ID362 ID363 ID365 ID366 ID367 ID368 ID369 ID370 ID371 ID372 ID374 ID377 ID378 ID379 ID380 ID381 ID382 ID385 ID386 ID389 ID393 ID395 ID396 ID397
## Moran's I ----
moran.i = function(vec, weights, ...) {
moran.test(vec,
weights, ...)$estimate['Moran I statistic']
}
## Moran's I for overall density
moran_i_places = weights_places %>%
map_dfr(~moran.i(log10(places_sf$densityE), .,
zero.policy = TRUE)) %>%
gather(key = 'k', value = 'I')
moran_i_places
## # A tibble: 8 x 2
## k I
## <chr> <dbl>
## 1 3 0.532
## 2 4 0.507
## 3 5 0.493
## 4 6 0.479
## 5 7 0.463
## 6 8 0.442
## 7 contiguity 0.284
## 8 distance 0.405
Higher moderate population clustering, .45-.53. Distance weights are more consistent w/ KNN here. Contiguity weights are much lower.
weights_places %>%
tibble(weights = ., k = names(.)) %>%
crossing(tibble(variable = c('whiteE_D',
'blackE_D',
'indigenousE_D',
'asianE_D',
'hispanicE_D',
'noncitizensE_D',
'childrenE_D',
'poverty_combE_D'))) %>%
rowwise() %>%
mutate(var_value = {places_sf %>%
as.data.frame() %>%
pull(variable) %>%
{. + 1} %>% log10() %>%
list()},
moran_i = moran.i(var_value, weights,
zero.policy = TRUE)) %>%
select(k, variable, moran_i) %>%
arrange(desc(moran_i)) %>%
mutate(variable = fct_inorder(variable)) %>%
ggplot(aes(variable, moran_i, color = k, group = k)) +
geom_point() +
geom_line() +
geom_hline(data = moran_i_places,
aes(yintercept = I, color = k),
linetype = 'dashed') +
coord_flip()
With places, most groups have low and below-average clustering. Impoverished people, noncitizens, and Hispanics have moderate clustering, and Hispanics and noncitizens are above the overall average. Distance values are generally similar to but a bit lower than the KNN. Contiguity values are generally quite a bit lower.